In [1]:
import pandas as pd
import numpy as np
df= pd.read_excel("TweetsKetoDiet.xlsx")
df.head()

# cleaning dataset
#Drop rows with missing values
df.dropna(inplace=True)
df.head(5)
Out[1]:
Unnamed: 0 User Date Created Number of Likes Source of Tweet Tweet
0 0 ketotipslowcarb 2023-01-09 23:58:29 0 Twitter for Android Powerful New Formula Triggers Fat-Burning Keto...
1 1 gimhuij44546290 2023-01-09 23:53:04 0 Jetpack.com Keto Diet vs Mediterranean Diet – Which Is Bet...
2 2 Americareform 2023-01-09 23:50:14 1 Twitter Web App @UltraHottie2000 I understand and look up the ...
3 3 HealthLineups 2023-01-09 23:48:47 2 Twitter for iPhone Eating Keto is simple 🥑🥘 Fewer carbs, more hea...
4 4 HealthLineups 2023-01-09 23:47:55 1 Twitter for iPhone We're so grateful to all of our customers who ...
In [2]:
import re
tweets_to_df = pd.DataFrame(df['Tweet'])
tweets_to_df.head()
#clean the tweets with a function
def cleanTweets(text):
    text = re.sub('@[A-Za-z0-9_]+', '', text) #removes @mentions
    text = re.sub('#','',text) #removes hastag '#' symbol
    text = re.sub('RT[\s]+','',text)
    text = re.sub('https?:\/\/\S+', '', text) 
    text = re.sub('\n',' ',text)
    return text
tweets_to_df['cleanedTweets'] = df['Tweet'].apply(cleanTweets) #apply cleanTweet function to the tweet
tweets_to_df.tail() #compares original tweets with cleaned Tweets
Out[2]:
Tweet cleanedTweets
4996 The Healthy Mediterranean Keto Diet Cookbook :... The Healthy Mediterranean Keto Diet Cookbook :...
4997 @mikeinspiresme so true #ketodiet #Health #tre... so true ketodiet Health trend RecipeOfTheDay ...
4998 Keto Diet Cookbook For Beginners: 550 Craveabl... Keto Diet Cookbook For Beginners: 550 Craveabl...
4999 @geoffreywoo @hvmn Do you follow a keto diet? ... Do you follow a keto diet? If so , how do yo...
5000 Easy Chicken Fajita Foil Packets\n\nThis easy ... Easy Chicken Fajita Foil Packets This easy tu...
In [3]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

sid = SentimentIntensityAnalyzer()
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
In [4]:
tweets_to_df['compound'] = [sid.polarity_scores(x)['compound'] for x in tweets_to_df['cleanedTweets']]
tweets_to_df['neg'] = [sid.polarity_scores(x)['neg'] for x in tweets_to_df['cleanedTweets']]
tweets_to_df['neu'] = [sid.polarity_scores(x)['neu'] for x in tweets_to_df['cleanedTweets']]
tweets_to_df['pos'] = [sid.polarity_scores(x)['pos'] for x in tweets_to_df['cleanedTweets']]
In [5]:
tweets_to_df
Out[5]:
Tweet cleanedTweets compound neg neu pos
0 Powerful New Formula Triggers Fat-Burning Keto... Powerful New Formula Triggers Fat-Burning Keto... 0.6996 0.000 0.746 0.254
1 Keto Diet vs Mediterranean Diet – Which Is Bet... Keto Diet vs Mediterranean Diet – Which Is Bet... 0.1531 0.142 0.679 0.179
2 @UltraHottie2000 I understand and look up the ... I understand and look up the keto diet cuz th... 0.3818 0.097 0.709 0.194
3 Eating Keto is simple 🥑🥘 Fewer carbs, more hea... Eating Keto is simple 🥑🥘 Fewer carbs, more hea... 0.9680 0.062 0.536 0.402
4 We're so grateful to all of our customers who ... We're so grateful to all of our customers who ... 0.9705 0.039 0.558 0.404
... ... ... ... ... ... ...
4996 The Healthy Mediterranean Keto Diet Cookbook :... The Healthy Mediterranean Keto Diet Cookbook :... 0.8834 0.000 0.553 0.447
4997 @mikeinspiresme so true #ketodiet #Health #tre... so true ketodiet Health trend RecipeOfTheDay ... 0.4754 0.000 0.781 0.219
4998 Keto Diet Cookbook For Beginners: 550 Craveabl... Keto Diet Cookbook For Beginners: 550 Craveabl... 0.0000 0.000 1.000 0.000
4999 @geoffreywoo @hvmn Do you follow a keto diet? ... Do you follow a keto diet? If so , how do yo... 0.0000 0.000 1.000 0.000
5000 Easy Chicken Fajita Foil Packets\n\nThis easy ... Easy Chicken Fajita Foil Packets This easy tu... 0.9025 0.063 0.713 0.224

5001 rows × 6 columns

In [6]:
i = 0

predicted_value = [ ] #empty series to hold our predicted values

while(i<len(tweets_to_df)):
    if ((tweets_to_df.iloc[i]['compound'] >= 0.5)):
        predicted_value.append('positive')
        i = i+1
    elif ((tweets_to_df.iloc[i]['compound'] > 0) & (tweets_to_df.iloc[i]['compound'] < 0.5)):
        predicted_value.append('neutral')
        i = i+1
    elif ((tweets_to_df.iloc[i]['compound'] <= 0)):
        predicted_value.append('negative')
        i = i+1
In [7]:
tweets_to_df['predicted sentiment'] = predicted_value
In [8]:
tweets_to_df
Out[8]:
Tweet cleanedTweets compound neg neu pos predicted sentiment
0 Powerful New Formula Triggers Fat-Burning Keto... Powerful New Formula Triggers Fat-Burning Keto... 0.6996 0.000 0.746 0.254 positive
1 Keto Diet vs Mediterranean Diet – Which Is Bet... Keto Diet vs Mediterranean Diet – Which Is Bet... 0.1531 0.142 0.679 0.179 neutral
2 @UltraHottie2000 I understand and look up the ... I understand and look up the keto diet cuz th... 0.3818 0.097 0.709 0.194 neutral
3 Eating Keto is simple 🥑🥘 Fewer carbs, more hea... Eating Keto is simple 🥑🥘 Fewer carbs, more hea... 0.9680 0.062 0.536 0.402 positive
4 We're so grateful to all of our customers who ... We're so grateful to all of our customers who ... 0.9705 0.039 0.558 0.404 positive
... ... ... ... ... ... ... ...
4996 The Healthy Mediterranean Keto Diet Cookbook :... The Healthy Mediterranean Keto Diet Cookbook :... 0.8834 0.000 0.553 0.447 positive
4997 @mikeinspiresme so true #ketodiet #Health #tre... so true ketodiet Health trend RecipeOfTheDay ... 0.4754 0.000 0.781 0.219 neutral
4998 Keto Diet Cookbook For Beginners: 550 Craveabl... Keto Diet Cookbook For Beginners: 550 Craveabl... 0.0000 0.000 1.000 0.000 negative
4999 @geoffreywoo @hvmn Do you follow a keto diet? ... Do you follow a keto diet? If so , how do yo... 0.0000 0.000 1.000 0.000 negative
5000 Easy Chicken Fajita Foil Packets\n\nThis easy ... Easy Chicken Fajita Foil Packets This easy tu... 0.9025 0.063 0.713 0.224 positive

5001 rows × 7 columns

In [10]:
tweets_to_df.to_excel('Clean.xlsx')
In [11]:
# count the occurrences of each unique value in the column
value_counts = tweets_to_df['predicted sentiment'].value_counts()

# plot the value counts as a bar chart
value_counts.plot(kind='bar')
Out[11]:
<AxesSubplot:>
In [12]:
import seaborn as sns
In [13]:
sns.countplot(x='predicted sentiment', data=tweets_to_df, hue='predicted sentiment',palette=['red', 'lightgreen', 'blue'])
Out[13]:
<AxesSubplot:xlabel='predicted sentiment', ylabel='count'>
In [14]:
tweets_to_df['predicted sentiment'].value_counts() #shows the counts of tweets' polarity
Out[14]:
negative    2130
positive    1743
neutral     1128
Name: predicted sentiment, dtype: int64
In [15]:
! pip install NRCLex
Requirement already satisfied: NRCLex in c:\users\dell\anaconda3\python2.0\lib\site-packages (3.0.0)
Requirement already satisfied: textblob in c:\users\dell\anaconda3\python2.0\lib\site-packages (from NRCLex) (0.17.1)
Requirement already satisfied: nltk>=3.1 in c:\users\dell\anaconda3\python2.0\lib\site-packages (from textblob->NRCLex) (3.7)
Requirement already satisfied: joblib in c:\users\dell\anaconda3\python2.0\lib\site-packages (from nltk>=3.1->textblob->NRCLex) (1.1.0)
Requirement already satisfied: regex>=2021.8.3 in c:\users\dell\anaconda3\python2.0\lib\site-packages (from nltk>=3.1->textblob->NRCLex) (2022.3.15)
Requirement already satisfied: click in c:\users\dell\anaconda3\python2.0\lib\site-packages (from nltk>=3.1->textblob->NRCLex) (8.0.4)
Requirement already satisfied: tqdm in c:\users\dell\anaconda3\python2.0\lib\site-packages (from nltk>=3.1->textblob->NRCLex) (4.64.0)
Requirement already satisfied: colorama in c:\users\dell\anaconda3\python2.0\lib\site-packages (from click->nltk>=3.1->textblob->NRCLex) (0.4.4)
In [16]:
import nltk
nltk.download('punkt')
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Out[16]:
True
In [17]:
from nrclex import NRCLex
In [ ]:
 
In [18]:
text_object = NRCLex(' '.join(tweets_to_df['cleanedTweets']))
In [19]:
text_object.affect_frequencies
Out[19]:
{'fear': 0.0867430441898527,
 'anger': 0.04923253859424072,
 'anticip': 0.0,
 'trust': 0.10642721281019153,
 'surprise': 0.055734949351970625,
 'positive': 0.1991418587163268,
 'negative': 0.13239262175432387,
 'sadness': 0.09479364798513734,
 'disgust': 0.06705887556951387,
 'joy': 0.09802273632060866,
 'anticipation': 0.11045251470783386}
In [20]:
text_object.top_emotions
Out[20]:
[('positive', 0.1991418587163268)]
In [21]:
sentiment_scores = pd.DataFrame(list(text_object.raw_emotion_scores.items())) 
In [22]:
sentiment_scores = sentiment_scores.rename(columns={0: "Sentiment", 1: "Count"})
sentiment_scores
Out[22]:
Sentiment Count
0 positive 4502
1 anger 1113
2 negative 2993
3 sadness 2143
4 anticipation 2497
5 disgust 1516
6 fear 1961
7 joy 2216
8 surprise 1260
9 trust 2406
In [23]:
import plotly.express as px
In [24]:
fig = px.pie(sentiment_scores, values='Count', names='Sentiment',
             title='Sentiment Scores',
             hover_data=['Sentiment'])
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()
In [ ]: